Beyond Matplotlib and Seaborn¶

A survey of Python data visualization tools¶

Stephanie Kirmer
@data_stephanie
https://github.com/skirmer/new-py-dataviz

In [2]:
fig, ax = plt.subplots(figsize=(12, 6))
n, bins, patches = ax.hist(dataset.acousticness, 50)
ax.set_xlabel('Acousticness')
ax.set_title('Histogram of Acousticness')
ax.grid(False)
plt.show()

In [4]:
fig, ax = plt.subplots(figsize=(12, 6))

with sns.axes_style("whitegrid"):
    viz = sns.histplot(data=dataset, x="acousticness", binwidth=.02, ax=ax)
    viz.set_title("Histogram of Acousticness")
    viz.set_xlabel('Acousticness')
    viz

In [6]:
output_notebook()
hist, edges = np.histogram(dataset.acousticness, bins=50)

p = figure(title="Histogram of Acousticness", 
           y_axis_label='Count', 
           x_axis_label='Acousticness', 
           width=750, 
           height = 400)

p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:])
show(p)
Loading BokehJS ...

Altair¶

In [9]:
source = dataset.sample(axis = 0, n=5000)

viz = alt.Chart(source)
viz = viz.mark_bar()
viz = viz.encode(alt.X("acousticness",    
   bin=True), y='count()')
viz = viz.properties(title='Histogram of Acousticness').properties(width=700, height=300)
viz
Out[9]:

In [10]:
pno.dpi = (150)
pno.figure_size = (6,3)
ggplot(data=dataset, mapping=aes(x='acousticness')) + \
    theme_bw(base_size = 6) + \
    geom_histogram(color='white', fill = 'darkblue', bins=50) + \
    labs(title = "Histogram of Acousticness")
Out[10]:
<ggplot: (325799316)>

Plotly Express

In [11]:
fig = px.histogram(dataset, \
                   x="acousticness", \
                   nbins=50, \
                   title="Histogram of Acousticness",\
                   template='plotly_white')

fig.update_layout(
    width=700,height=400,
    margin=dict(l=15,r=25,b=15,t=40,pad=1))
fig.show()

Best-Of Winners¶

Histogram: Plotnine¶

In [13]:
ggplot(data=dataset, mapping=aes(x='acousticness')) + \
    theme_bw(base_size = 6) + \
    geom_histogram(color='white', fill = 'darkblue', bins=50) + \
    labs(title = "Histogram of Acousticness")
Out[13]:
<ggplot: (325848519)>

Plain Scatter: Bokeh¶

In [17]:
output_notebook()

p = figure(
    title="Acousticness x Loudness Scatterplot",
    y_axis_label="Loudness",
    x_axis_label="Acousticness",
    width=750,
    height=400,
)
Loading BokehJS ...
In [18]:
p.scatter(
    x=dataset.acousticness,
    y=dataset.loudness,
    marker="circle",
    line_color="#97b5e6",
    fill_color="#2b4570",
    fill_alpha=0.75,
    size=5,
)

show(p)

Faceted Scatter: Plotnine/Plotly tie¶

Plotly Express

In [19]:
ggplot(data=dataset, mapping=aes(x='acousticness', y='loudness')) + \
    facet_wrap('explicit', ncol = 1, labeller='label_both') + \
    theme_bw(base_size=6) + \
    geom_point(size = .5, fill = '#2b4570', alpha = .75, color = "#97b5e6") + \
    labs(title = "Acousticness x Loudness Scatterplot")
Out[19]:
<ggplot: (326435244)>
In [20]:
fig = px.scatter(dataset, 
                 x="acousticness", 
                 y='loudness', 
                 title="Acousticness x Loudness Scatterplot", 
                 template='plotly_white',
                 facet_row="explicit")
fig.update_layout(
    width=700,height=400,
    margin=dict(l=15,r=25,b=15,t=40,pad=1))
fig.show()

Grouped Bar: Altair¶

In [22]:
source = sample

viz = alt.Chart(source)
viz = viz.mark_bar()
viz = viz.encode(
    x='group:O',
    y='mean(danceability):Q',
    color='group:N'
)
viz = viz.properties(title='Danceability, Beyonce vs Beatles').properties(width=600, height=300)

viz
Out[22]:

Timeline: Bokeh¶

In [37]:
p = figure(title="Danceability by Release Date", 
           y_axis_label='Danceability', 
           x_axis_label='Release Date', 
           width=700, 
           x_axis_type='datetime',
           height = 400)

p.line(x=grouped_sample['release_date'], y=grouped_sample['danceability mean'], color = 'blue', alpha = .75)

p.line(x=grouped_sample['release_date'], y=grouped_sample['danceability min'], color = 'red', alpha = .25)

p.line(x=grouped_sample['release_date'], y=grouped_sample['danceability max'], color = 'green', alpha = .25)

show(p)

3D Scatter: Plotly¶

Graph Objects

In [33]:
from plotly.graph_objects import layout, XAxis, YAxis
dataset_sm = dataset.sample(axis = 0, n=10000)

x=dataset_sm.acousticness
y=dataset_sm.loudness
z=dataset_sm.danceability
col=dataset_sm.explicit

fig = go.Figure(
    data=[go.Scatter3d(x=x,y=y,z=z,mode='markers',
    marker=dict(size=2, color = np.array(col), colorscale = 'Viridis', opacity=0.8))])
In [34]:
fig.update_layout(
    title="Acousticness x Loudness x Danceability Scatterplot", 
    template='plotly_white',
    autosize=True,
    width=500,
    height=500,
    scene=layout.Scene(
        xaxis=layout.scene.XAxis(title='Acousticness'),
        yaxis=layout.scene.YAxis(title='Loudness'),
        zaxis=layout.scene.ZAxis(title='Danceability')
    ),
    margin=dict(l=1,r=1,b=40,t=45,pad=1))
fig.show()
In [ ]: